home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Celestin Apprentice 5
/
Apprentice-Release5.iso
/
Source Code
/
PowerPlant
/
DMultiStringLocator
/
source
/
DMultiStringLocator.h
< prev
next >
Wrap
Text File
|
1996-07-02
|
4KB
|
121 lines
// ===========================================================================
// DMultiStringLocator.h
// ---------------------
// ©1996 Eric Gundrum, All rights reserved.
//
// The contents of this file may be freely altered and freely distributed
// in any form, provided this copyright statement is retained unaltered.
// Add your own changes below.
// ---------------------
//
// Based on source code provided in Practical Algorithms for Programmers,
// by Binstock and Rex, published in 1995 by Addison Wesley.
//
// Simultaneously search text for multiple strings using the Aho/Corasick
// algorithm.
//
#pragma once
// ANSI C++ headers
// #include <stdexcept>
// Mac Headers
#include <Types.h> // for Boolean
// PowerPlant Headers
#include <LList.h>
#include <LString.h>
#pragma mark --- DSearchString declarations ---
// ===========================================================================
// ----------- DSearchString declarations ----------
// ===========================================================================
// An abstract class containing the string and identifying the function
// called when this string is found.
class DSearchString : public LStr255
{
public:
virtual ~DSearchString() {;}
DSearchString() {;}
virtual Boolean ReportFound( long /*inOffset*/ ) = 0;
// inOffset indicates the location in the buffer
// of the last character of the recognized string.
private:
// stop defaults
DSearchString( const DSearchString &inOriginal );
};
#pragma mark --- DMultiStringLocator declarations ---
// ===========================================================================
// ----------- DMultiStringLocator declarations ----------
// ===========================================================================
class DMultiStringLocator
{
public:
virtual ~DMultiStringLocator();
DMultiStringLocator
( LList &inStringList // list of search strings
, int inAlphabetSize = 256 // size of BranchTable
);
virtual int SearchBuffer ( Uchar *inBufferP, int inSize );
class error {};
class memerror : public error {};
class state_table_exceeded : public error {};
class input_item_NULL : public error {};
class list_item_NULL : public error {};
enum { searchStatus_stop = -1 };
protected:
// stop defaults
DMultiStringLocator( const DMultiStringLocator &inOriginal );
DMultiStringLocator();
void AddString ( DSearchString &inString );
typedef int stateT;
void AddStateTrans
( int matchChar
, stateT currentState
, stateT nextState
);
void RetryArrayInit();
void FindRetryState
( int inChar, stateT currentState, stateT nextState );
void QueueAdd( int *queue, int gbeg, stateT inState );
stateT mMaxState; // max space for states
stateT mHighState; // track the next free state
int mAlphabetSize; // size of BranchTable
int *MatchArray; // First level of matching:
#define BRANCH -1 // match flag: use branch table
#define EMPTY_SLOT -2 // match flag: unused slot
#define FAIL_STATE -1 // for GototState and BranchTable
#define state_begin 0 // the starting state
stateT *RetryArray; // jump to new search word (state) when search fails
LList *OutArray; // results objects indexed by end state
union GotoTable // destination of MatchArray values
{
stateT GotoState; // destination if MatchArray is char
stateT *BranchTable; // else goto this BRANCH branch table
} *GotoArray;
};
// ===========================================================================